In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
df = pd.read_csv("environment-raw-2021.csv", skiprows=4)
# Indicators for our air pollution and mortality rates based on the year.
pm25_indicator = "PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)"
mort_indicator = "Mortality rate attributed to household and ambient air pollution, age-standardized (per 100,000 population)"
year = "2016"
# Filter both indicators so we don't have any bad data
pm25_data = df[df["Indicator Name"] == pm25_indicator][["Country Name", year]].dropna()
mort_data = df[df["Indicator Name"] == mort_indicator][["Country Name", year]].dropna()
# Merge on country
merged = pd.merge(pm25_data, mort_data, on="Country Name", suffixes=("_PM2.5", "_Mortality"))
# Rename for clarity
merged.columns = ["Country", "PM2.5", "Mortality"]
# Let's use our data now to make this a scatter plot.
plt.figure(figsize=(12, 8))
plt.scatter(merged["PM2.5"], merged["Mortality"], alpha=0.7)
# Let's choose some countries to highlight now. United States and China are big ones, let's additionally do Nepal as this is a country with high air pollution, and Brazil as this is a country with lots of deforestation
for country in ["United States", "China", "Brazil", "Nepal", ]:
row = merged[merged["Country"] == country]
if not row.empty:
plt.scatter(row["PM2.5"], row["Mortality"], color="red", s=100)
plt.text(row["PM2.5"] + 0.1, row["Mortality"] + 1, country, fontsize=10)
plt.title("Relationship Between PM2.5 Exposure and Mortality Rate (2016)", fontsize=14)
plt.xlabel("PM2.5 (µg/m³)")
plt.ylabel("Mortality Rate (per 100,000)")
plt.grid(True, linestyle="--", alpha=0.5)
plt.tight_layout()
plt.show()
C:\Users\theog\anaconda3\Lib\site-packages\matplotlib\text.py:897: FutureWarning: Calling float on a single element Series is deprecated and will raise a TypeError in the future. Use float(ser.iloc[0]) instead x = float(self.convert_xunits(self._x)) C:\Users\theog\anaconda3\Lib\site-packages\matplotlib\text.py:898: FutureWarning: Calling float on a single element Series is deprecated and will raise a TypeError in the future. Use float(ser.iloc[0]) instead y = float(self.convert_yunits(self._y)) C:\Users\theog\anaconda3\Lib\site-packages\matplotlib\text.py:756: FutureWarning: Calling float on a single element Series is deprecated and will raise a TypeError in the future. Use float(ser.iloc[0]) instead posx = float(self.convert_xunits(self._x)) C:\Users\theog\anaconda3\Lib\site-packages\matplotlib\text.py:757: FutureWarning: Calling float on a single element Series is deprecated and will raise a TypeError in the future. Use float(ser.iloc[0]) instead posy = float(self.convert_yunits(self._y))
In [2]:
#Let's make this interactive now so we can see specific countries
# Filter and clean data again
pm25_data = df[df["Indicator Name"] == pm25_indicator][["Country Name", year]].dropna()
mort_data = df[df["Indicator Name"] == mort_indicator][["Country Name", year]].dropna()
# Merge both indicators on Country
merged = pd.merge(pm25_data, mort_data, on="Country Name", suffixes=("_PM2.5", "_Mortality"))
merged.columns = ["Country", "PM2.5", "Mortality"]
# Create an interactive scatter plot
fig = px.scatter(
merged,
x="PM2.5",
y="Mortality",
size="PM2.5",
color="Mortality",
hover_name="Country",
title="Interactive PM2.5 Exposure vs. Mortality Rate by Country (2016)",
labels={"PM2.5": "PM2.5 (µg/m³)", "Mortality": "Mortality Rate (per 100,000)"},
color_continuous_scale="RdYlBu_r",
)
fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
fig.show()
In [ ]: